import json
import random
import time
from typing import Union
from fastapi import FastAPI
app = FastAPI()
from modelscope import snapshot_download
from transformers import AutoModelForCausalLM, AutoTokenizer

model_dir = snapshot_download('qwen/Qwen-7B-Chat',cache_dir='./check')


tokenizer = AutoTokenizer.from_pretrained(model_dir, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
    model_dir,
    device_map="auto",
    trust_remote_code=True
).eval()


@app.get("/app/get_response")
def read_item(message: str, history:str):
    answer, history = model.chat(tokenizer, json.loads(message), history=json.loads(message))
    return answer, history


